Tarea 5¶

  • La data proviene de datos abiertos del Minsa
In [20]:
import pandas as pd

# Leer el archivo CSV
data = pd.read_csv("data_PC.csv")
In [21]:
# cambio el nombre de MI DATA, para evitar errores
data.rename(columns={'PROVINCIA': 'provincia'}, inplace=True)
In [22]:
data.rename(columns={'DEPARTAMENTO': 'departamento'}, inplace=True)
In [23]:
# 
data.info
Out[23]:
<bound method DataFrame.info of           ANO departamento  provincia           DISTRITO  UBIGEO  EDAD  \
0        2022       TUMBES     TUMBES             TUMBES  240101    46   
1        2023         LIMA       LIMA        JESUS MARIA  150113    69   
2        2024   SAN MARTIN  MOYOBAMBA          MOYOBAMBA  220101    55   
3        2023     AREQUIPA   CAYLLOMA          COPORAQUE   40506    50   
4        2022         LIMA       LIMA        JESUS MARIA  150113    58   
...       ...          ...        ...                ...     ...   ...   
1741994  2023        JUNIN   HUANCAYO          PILCOMAYO  120125    87   
1741995  2023         LIMA       LIMA  VILLA EL SALVADOR  150142    85   
1741996  2022         LIMA       LIMA          LA MOLINA  150114    38   
1741997  2023        JUNIN   HUANCAYO           HUANCAYO  120101    48   
1741998  2023     AREQUIPA   AREQUIPA           AREQUIPA   40101    71   

              SEXO  
0         FEMENINO  
1         FEMENINO  
2         FEMENINO  
3        MASCULINO  
4        MASCULINO  
...            ...  
1741994  MASCULINO  
1741995  MASCULINO  
1741996  MASCULINO  
1741997   FEMENINO  
1741998   FEMENINO  

[1741999 rows x 7 columns]>
In [24]:
# ver

data.head()
Out[24]:
ANO departamento provincia DISTRITO UBIGEO EDAD SEXO
0 2022 TUMBES TUMBES TUMBES 240101 46 FEMENINO
1 2023 LIMA LIMA JESUS MARIA 150113 69 FEMENINO
2 2024 SAN MARTIN MOYOBAMBA MOYOBAMBA 220101 55 FEMENINO
3 2023 AREQUIPA CAYLLOMA COPORAQUE 40506 50 MASCULINO
4 2022 LIMA LIMA JESUS MARIA 150113 58 MASCULINO
In [25]:
# years in data

data.ANO.value_counts()
Out[25]:
ANO
2022    782448
2020    509522
2021    411392
2023     30544
2024      8093
Name: count, dtype: int64
  • comenzamos a modificar la data para el análisis, se cuenta según el UBIGEO en este caso
In [26]:
#Agrupación básica de registros por año y ubicación:

indexList = ['ANO', 'departamento', 'provincia']
aggregator = {'DISTRITO': 'count'}
covid_provYear = data.groupby(indexList,observed=True).agg(aggregator)
covid_provYear.rename(columns={'DISTRITO': 'conteo_casos'}, inplace=True)
covid_provYear
Out[26]:
conteo_casos
ANO departamento provincia
2020 AMAZONAS BAGUA 4299
BONGARA 191
CHACHAPOYAS 1060
CONDORCANQUI 2173
LUYA 234
... ... ... ...
2024 TUMBES CONTRALMIRANTE VILLAR 1
TUMBES 17
ZARUMILLA 3
UCAYALI CORONEL PORTILLO 33
PADRE ABAD 1

931 rows × 1 columns

In [27]:
#Añadir columnas para un análisis porcentual

covid_provYear['CASOS_pct'] = covid_provYear['conteo_casos'] / covid_provYear['conteo_casos'].sum()
In [28]:
#Reorganizar el DataFrame por años:

covid_provYear_wide = covid_provYear.unstack('ANO').fillna(0)
covid_provYear_wide.columns = ['{}_{}'.format(col[0], col[1]) for col in covid_provYear_wide.columns]
covid_provYear_wide.reset_index(inplace=True)
covid_provYear_wide
Out[28]:
departamento provincia conteo_casos_2020 conteo_casos_2021 conteo_casos_2022 conteo_casos_2023 conteo_casos_2024 CASOS_pct_2020 CASOS_pct_2021 CASOS_pct_2022 CASOS_pct_2023 CASOS_pct_2024
0 AMAZONAS BAGUA 4299.0 1181.0 1790.0 41.0 20.0 0.002468 0.000678 0.001028 2.353618e-05 1.148106e-05
1 AMAZONAS BONGARA 191.0 329.0 387.0 12.0 16.0 0.000110 0.000189 0.000222 6.888638e-06 9.184850e-06
2 AMAZONAS CHACHAPOYAS 1060.0 1566.0 2884.0 61.0 42.0 0.000608 0.000899 0.001656 3.501724e-05 2.411023e-05
3 AMAZONAS CONDORCANQUI 2173.0 199.0 246.0 3.0 0.0 0.001247 0.000114 0.000141 1.722159e-06 0.000000e+00
4 AMAZONAS LUYA 234.0 381.0 480.0 23.0 3.0 0.000134 0.000219 0.000276 1.320322e-05 1.722159e-06
... ... ... ... ... ... ... ... ... ... ... ... ...
191 TUMBES ZARUMILLA 950.0 538.0 550.0 23.0 3.0 0.000545 0.000309 0.000316 1.320322e-05 1.722159e-06
192 UCAYALI ATALAYA 309.0 71.0 35.0 0.0 0.0 0.000177 0.000041 0.000020 0.000000e+00 0.000000e+00
193 UCAYALI CORONEL PORTILLO 9172.0 3110.0 4425.0 66.0 33.0 0.005265 0.001785 0.002540 3.788751e-05 1.894375e-05
194 UCAYALI PADRE ABAD 990.0 479.0 510.0 13.0 1.0 0.000568 0.000275 0.000293 7.462691e-06 5.740531e-07
195 UCAYALI PURUS 224.0 28.0 24.0 1.0 0.0 0.000129 0.000016 0.000014 5.740531e-07 0.000000e+00

196 rows × 12 columns

In [29]:
# dengue_provYear_Alarm_w.columns
In [30]:
# dengue_provYear_Alarm_w.columns=['year'+str(x) for x in dengue_provYear_Alarm_w.columns]
In [31]:
# as usual
# dengue_provYear_Alarm_w.reset_index(inplace=True)
# dengue_provYear_Alarm_w
  • se comienza con el mapa
In [32]:
#mapaaa

mapLink='https://github.com/SocialAnalytics-StrategicIntelligence/GeoDF_Analytics/raw/main/maps/ProvsINEI2023.zip'

import geopandas as gpd

provmap=gpd.read_file(mapLink)

provmap.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 196 entries, 0 to 195
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   OBJECTID    196 non-null    float64 
 1   CCDD        196 non-null    object  
 2   CCPP        196 non-null    object  
 3   DEPARTAMEN  196 non-null    object  
 4   PROVINCIA   196 non-null    object  
 5   geometry    196 non-null    geometry
dtypes: float64(1), geometry(1), object(4)
memory usage: 9.3+ KB
In [33]:
#

provmap['location']=['+'.join(x[0]) for x in zip(provmap.iloc[:,3:5].values)]
provmap.head(10)
Out[33]:
OBJECTID CCDD CCPP DEPARTAMEN PROVINCIA geometry location
0 1.0 01 01 AMAZONAS CHACHAPOYAS POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... AMAZONAS+CHACHAPOYAS
1 2.0 01 02 AMAZONAS BAGUA POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... AMAZONAS+BAGUA
2 3.0 01 03 AMAZONAS BONGARA POLYGON ((-77.72759 -5.1403, -77.72361 -5.1406... AMAZONAS+BONGARA
3 4.0 01 04 AMAZONAS CONDORCANQUI POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... AMAZONAS+CONDORCANQUI
4 5.0 01 05 AMAZONAS LUYA POLYGON ((-78.13023 -5.9037, -78.13011 -5.9041... AMAZONAS+LUYA
5 6.0 01 06 AMAZONAS RODRIGUEZ DE MENDOZA POLYGON ((-77.44452 -6.05002, -77.44387 -6.050... AMAZONAS+RODRIGUEZ DE MENDOZA
6 7.0 01 07 AMAZONAS UTCUBAMBA POLYGON ((-78.09288 -5.36258, -78.09288 -5.364... AMAZONAS+UTCUBAMBA
7 8.0 02 01 ANCASH HUARAZ POLYGON ((-77.3987 -9.35563, -77.39852 -9.3560... ANCASH+HUARAZ
8 9.0 02 02 ANCASH AIJA POLYGON ((-77.61368 -9.649, -77.61241 -9.64975... ANCASH+AIJA
9 10.0 02 03 ANCASH ANTONIO RAYMONDI POLYGON ((-77.08856 -8.97496, -77.08804 -8.975... ANCASH+ANTONIO RAYMONDI
In [34]:
#

covid_provYear_wide['location']=['+'.join(x[0]) for x in zip(covid_provYear_wide.iloc[:,:2].values)]
covid_provYear_wide.head()
Out[34]:
departamento provincia conteo_casos_2020 conteo_casos_2021 conteo_casos_2022 conteo_casos_2023 conteo_casos_2024 CASOS_pct_2020 CASOS_pct_2021 CASOS_pct_2022 CASOS_pct_2023 CASOS_pct_2024 location
0 AMAZONAS BAGUA 4299.0 1181.0 1790.0 41.0 20.0 0.002468 0.000678 0.001028 0.000024 0.000011 AMAZONAS+BAGUA
1 AMAZONAS BONGARA 191.0 329.0 387.0 12.0 16.0 0.000110 0.000189 0.000222 0.000007 0.000009 AMAZONAS+BONGARA
2 AMAZONAS CHACHAPOYAS 1060.0 1566.0 2884.0 61.0 42.0 0.000608 0.000899 0.001656 0.000035 0.000024 AMAZONAS+CHACHAPOYAS
3 AMAZONAS CONDORCANQUI 2173.0 199.0 246.0 3.0 0.0 0.001247 0.000114 0.000141 0.000002 0.000000 AMAZONAS+CONDORCANQUI
4 AMAZONAS LUYA 234.0 381.0 480.0 23.0 3.0 0.000134 0.000219 0.000276 0.000013 0.000002 AMAZONAS+LUYA

Preprocessing¶

In [35]:
import unidecode


byePunctuation=lambda x: unidecode.unidecode(x)
covid_provYear_wide['location']=covid_provYear_wide['location'].apply(byePunctuation)
provmap['location']=provmap['location'].apply(byePunctuation)
In [36]:
# replacing dashes and multiple spaces by a simple space

covid_provYear_wide['location']=covid_provYear_wide.location.str.replace("\-|\_|\s+","",regex=True)
provmap['location']=provmap.location.str.replace("\-|\_|\s+","",regex=True)

Merging¶

In [37]:
# match o no?

nomatch_df=set(covid_provYear_wide.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(covid_provYear_wide.location)
In [38]:
#

len(nomatch_df), len(nomatch_gdf)
Out[38]:
(2, 2)
In [39]:
# pick the closest match from nomatch_gdf for a value in nomatch_df

from thefuzz import process

[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]
Out[39]:
[('ANCASH+ANTONIORAIMONDI', ('ANCASH+ANTONIORAYMONDI', 95)),
 ('ICA+NAZCA', ('ICA+NASCA', 89))]
In [40]:
# is this OK?

{dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}
Out[40]:
{'ANCASH+ANTONIORAIMONDI': 'ANCASH+ANTONIORAYMONDI', 'ICA+NAZCA': 'ICA+NASCA'}
In [41]:
# then:

changesinDF={dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}
In [44]:
covid_provYear_wide.replace({'location': changesinDF}, inplace=True)
In [45]:
nomatch_df=set(covid_provYear_wide.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(covid_provYear_wide.location)

[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]
Out[45]:
[]
In [46]:
covid_provYear_map=provmap.merge(covid_provYear_wide, on='location',how='left',indicator='flag')
In [47]:
# check

covid_provYear_map.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 196 entries, 0 to 195
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   OBJECTID           196 non-null    float64 
 1   CCDD               196 non-null    object  
 2   CCPP               196 non-null    object  
 3   DEPARTAMEN         196 non-null    object  
 4   PROVINCIA          196 non-null    object  
 5   geometry           196 non-null    geometry
 6   location           196 non-null    object  
 7   departamento       196 non-null    object  
 8   provincia          196 non-null    object  
 9   conteo_casos_2020  196 non-null    float64 
 10  conteo_casos_2021  196 non-null    float64 
 11  conteo_casos_2022  196 non-null    float64 
 12  conteo_casos_2023  196 non-null    float64 
 13  conteo_casos_2024  196 non-null    float64 
 14  CASOS_pct_2020     196 non-null    float64 
 15  CASOS_pct_2021     196 non-null    float64 
 16  CASOS_pct_2022     196 non-null    float64 
 17  CASOS_pct_2023     196 non-null    float64 
 18  CASOS_pct_2024     196 non-null    float64 
 19  flag               196 non-null    category
dtypes: category(1), float64(11), geometry(1), object(7)
memory usage: 29.5+ KB
In [48]:
# acaaaaa ojo --> se esta usando flag
# avoid poblems with fillna()
covid_provYear_map['flag']=covid_provYear_map.flag.astype(str)
  • elimino las columnas que no usaré.
In [49]:
bye=['departamento','provincia', 'CCPP','CCDD', 'conteo_casos_2020', 'conteo_casos_2021', 'conteo_casos_2022', 'conteo_casos_2023', 'conteo_casos_2024']
covid_provYear_map.drop(columns=bye,inplace=True)

# keeping
covid_provYear_map.head()
Out[49]:
OBJECTID DEPARTAMEN PROVINCIA geometry location CASOS_pct_2020 CASOS_pct_2021 CASOS_pct_2022 CASOS_pct_2023 CASOS_pct_2024 flag
0 1.0 AMAZONAS CHACHAPOYAS POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... AMAZONAS+CHACHAPOYAS 0.000608 0.000899 0.001656 0.000035 0.000024 both
1 2.0 AMAZONAS BAGUA POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... AMAZONAS+BAGUA 0.002468 0.000678 0.001028 0.000024 0.000011 both
2 3.0 AMAZONAS BONGARA POLYGON ((-77.72759 -5.1403, -77.72361 -5.1406... AMAZONAS+BONGARA 0.000110 0.000189 0.000222 0.000007 0.000009 both
3 4.0 AMAZONAS CONDORCANQUI POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... AMAZONAS+CONDORCANQUI 0.001247 0.000114 0.000141 0.000002 0.000000 both
4 5.0 AMAZONAS LUYA POLYGON ((-78.13023 -5.9037, -78.13011 -5.9041... AMAZONAS+LUYA 0.000134 0.000219 0.000276 0.000013 0.000002 both
In [50]:
# filling with zeroes

covid_provYear_map.fillna(0,inplace=True)
In [51]:
import os

covid_provYear_map.to_file(os.path.join('maps',"provinciasPeru.gpkg"), layer='provincias_PC', driver="GPKG")

Explorando el año 2021¶

In [52]:
# statistics

covid_provYear_map.CASOS_pct_2021.describe()
Out[52]:
count    196.000000
mean       0.001205
std        0.006644
min        0.000016
25%        0.000122
50%        0.000236
75%        0.000652
max        0.091328
Name: CASOS_pct_2021, dtype: float64
In [53]:
import seaborn as sea

sea.boxplot(covid_provYear_map.CASOS_pct_2021, color='yellow',orient='h')
Out[53]:
<Axes: xlabel='CASOS_pct_2021'>
No description has been provided for this image

--> Interpretación:

  • Este boxplot del porcentaje de casos de COVID-19 en el 2021 muestra los valores de CASOS_pct_2021 en diferentes provincias. En resumen, la mayoría de las provincias tuvieron un porcentaje bajo de casos, pero hubo algunas con porcentajes notablemente más altos, indicando posibles áreas críticas o focos de contagio.
In [54]:
from sklearn.preprocessing import QuantileTransformer
qt = QuantileTransformer(n_quantiles=100, random_state=0,output_distribution='normal')
qt_result=qt.fit_transform(covid_provYear_map[['CASOS_pct_2021']])
sea.boxplot(qt_result, color='yellow',orient='h')
Out[54]:
<Axes: >
No description has been provided for this image

Aclaración: En el segundo gráfico, el QuantileTransformer ha transformado los datos de los porcentajes de casos de COVID-19 en 2021 en una distribución normal. Esta transformación hace que los valores se distribuyan alrededor de 0, con algunos valores extremos visibles a la izquierda y derecha.

In [55]:
covid_provYear_map['CASOS_2021_qt']=qt_result
In [56]:
# mmm es para modificar de cierta manera los valores atipicos 

covid_provYear_map['CASOS_2021_qt']
Out[56]:
0      0.851840
1      0.733602
2     -0.207325
3     -0.750835
4     -0.063341
         ...   
191    0.194210
192    1.313104
193   -1.545312
194    0.119499
195   -5.199338
Name: CASOS_2021_qt, Length: 196, dtype: float64

Spatial Correlation¶

Neighboorhood¶

In [57]:
from libpysal.weights import Queen, Rook, KNN

# rook

w_rook = Rook.from_dataframe(covid_provYear_map,use_index=False)
In [58]:
# queen 

w_queen = Queen.from_dataframe(covid_provYear_map,use_index=False)
In [59]:
# k nearest neighbors - le digo el n° de vecinos mas cercanos con el que quiero trbajar

w_knn = KNN.from_dataframe(covid_provYear_map, k=8)
In [60]:
# first one

covid_provYear_map.head(5)
Out[60]:
OBJECTID DEPARTAMEN PROVINCIA geometry location CASOS_pct_2020 CASOS_pct_2021 CASOS_pct_2022 CASOS_pct_2023 CASOS_pct_2024 flag CASOS_2021_qt
0 1.0 AMAZONAS CHACHAPOYAS POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... AMAZONAS+CHACHAPOYAS 0.000608 0.000899 0.001656 0.000035 0.000024 both 0.851840
1 2.0 AMAZONAS BAGUA POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... AMAZONAS+BAGUA 0.002468 0.000678 0.001028 0.000024 0.000011 both 0.733602
2 3.0 AMAZONAS BONGARA POLYGON ((-77.72759 -5.1403, -77.72361 -5.1406... AMAZONAS+BONGARA 0.000110 0.000189 0.000222 0.000007 0.000009 both -0.207325
3 4.0 AMAZONAS CONDORCANQUI POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... AMAZONAS+CONDORCANQUI 0.001247 0.000114 0.000141 0.000002 0.000000 both -0.750835
4 5.0 AMAZONAS LUYA POLYGON ((-78.13023 -5.9037, -78.13011 -5.9041... AMAZONAS+LUYA 0.000134 0.000219 0.000276 0.000013 0.000002 both -0.063341
In [61]:
# amount neighbors of that district

w_rook.neighbors[7]
Out[61]:
[16, 17, 23, 8, 26, 12, 14]
In [62]:
# Crear la base solo con la provincia HUARI en rojo
base = covid_provYear_map[covid_provYear_map.PROVINCIA == "HUARI"].plot(color="red", edgecolor="black")

# Graficar vecinos de HUARI en amarillo
covid_provYear_map.iloc[w_rook.neighbors[7]].plot(ax=base, color="yellow", edgecolor="black")
Out[62]:
<Axes: >
No description has been provided for this image
In [63]:
base=covid_provYear_map[covid_provYear_map.PROVINCIA=="CHACHAPOYAS"].plot()
covid_provYear_map.iloc[w_knn.neighbors[0] ,].plot(ax=base,facecolor="yellow",edgecolor='k')
covid_provYear_map.head(1).plot(ax=base,facecolor="red")
Out[63]:
<Axes: >
No description has been provided for this image
In [64]:
# all the neighbors by row
w_queen.neighbors
Out[64]:
{0: [2, 114, 4, 5, 180, 182, 55],
 1: [3, 60, 61, 6],
 2: [0, 143, 3, 4, 6, 182, 175],
 3: [1, 2, 6, 143],
 4: [0, 2, 6, 55, 56],
 5: [0, 178, 180, 182, 175],
 6: [1, 2, 3, 4, 56, 58, 60],
 7: [16, 17, 23, 8, 26, 12, 14],
 8: [17, 7, 23],
 9: [16, 90, 13],
 10: [16, 26, 12, 13],
 11: [96, 129, 128, 16, 17, 20, 23, 89, 91],
 12: [16, 26, 10, 7],
 13: [16, 19, 26, 90, 9, 10],
 14: [24, 17, 26, 7],
 15: [24, 25, 18, 21],
 16: [7, 9, 10, 11, 12, 13, 23, 90, 91],
 17: [128, 23, 7, 8, 11, 14],
 18: [22, 24, 25, 26, 15],
 19: [13, 26, 22, 90, 93],
 20: [128, 129, 11, 134],
 21: [119, 24, 121, 123, 25, 15],
 22: [18, 19, 119, 25, 26, 93],
 23: [16, 17, 7, 8, 11],
 24: [18, 21, 121, 26, 123, 14, 15],
 25: [18, 21, 22, 119, 15],
 26: [7, 10, 12, 13, 14, 18, 19, 22, 24],
 27: [33, 69, 75, 28, 29, 30, 31],
 28: [32, 75, 46, 47, 48, 50, 52, 27, 30],
 29: [48, 33, 73, 27, 31, 30, 41],
 30: [48, 27, 28, 29],
 31: [33, 69, 73, 27, 76, 29],
 32: [42, 52, 28, 46],
 33: [27, 29, 31],
 34: [35, 149, 38, 40, 172],
 35: [34, 36, 37, 38, 39, 40],
 36: [48, 49, 35, 100, 39, 41, 47],
 37: [35, 38, 39],
 38: [34, 35, 37, 39, 168, 73, 74, 172],
 39: [35, 36, 37, 38, 73, 74, 41],
 40: [34, 35, 148, 149, 150],
 41: [48, 49, 36, 39, 73, 29],
 42: [32, 82, 52, 85, 43, 45, 46],
 43: [42, 51, 52, 85],
 44: [51, 85, 47],
 45: [42, 75, 108, 46, 81, 82, 84, 86],
 46: [32, 42, 75, 28, 45],
 47: [36, 100, 101, 44, 48, 50, 51, 85, 28],
 48: [49, 36, 41, 28, 29, 30, 47],
 49: [48, 41, 36],
 50: [51, 52, 28, 47],
 51: [50, 52, 85, 43, 44, 47],
 52: [32, 50, 51, 42, 43, 28],
 53: [64, 54, 55, 57, 122, 59, 62],
 54: [114, 53, 117, 120, 122, 62],
 55: [0, 114, 4, 53, 56, 59, 62],
 56: [65, 4, 6, 55, 58, 59, 124, 125],
 57: [64, 113, 115, 53, 118, 122, 63],
 58: [56, 60, 125, 6],
 59: [64, 65, 53, 55, 56, 63],
 60: [1, 125, 6, 58, 156, 61, 126],
 61: [1, 156, 60],
 62: [114, 53, 54, 55],
 63: [64, 65, 115, 57, 59, 124],
 64: [57, 59, 53, 63],
 65: [56, 59, 124, 63],
 66: [127],
 67: [69, 70, 76, 78, 79],
 68: [71, 72, 73, 76, 78],
 69: [67, 75, 27, 76, 79, 31],
 70: [146, 67, 75, 77, 78, 79],
 71: [169, 68, 72, 73, 74],
 72: [68, 164, 71, 169, 78],
 73: [68, 38, 39, 71, 41, 74, 76, 29, 31],
 74: [169, 38, 39, 168, 73, 71],
 75: [193, 69, 70, 108, 45, 46, 79, 146, 27, 28],
 76: [67, 68, 69, 73, 78, 31],
 77: [78, 146, 70],
 78: [67, 68, 164, 70, 72, 76, 77, 146],
 79: [75, 67, 69, 70],
 80: [103, 136, 81, 82, 83, 84, 85, 86],
 81: [80, 82, 84, 45],
 82: [80, 81, 85, 42, 45],
 83: [80, 99, 85, 102, 136],
 84: [80, 81, 45, 86],
 85: [98, 101, 102, 42, 43, 44, 47, 80, 82, 51, 83],
 86: [80, 84, 103, 108, 45],
 87: [96, 97, 88, 89, 92, 94],
 88: [96, 87, 151, 152, 94],
 89: [96, 97, 87, 11, 91, 92],
 90: [16, 19, 93, 9, 91, 92, 13],
 91: [16, 89, 90, 11, 92],
 92: [194, 142, 176, 87, 184, 89, 90, 91, 93, 94, 95],
 93: [19, 22, 119, 184, 90, 92],
 94: [87, 151, 153, 88, 92, 95],
 95: [192, 194, 153, 92, 94],
 96: [129, 97, 135, 11, 87, 88, 89, 152],
 97: [96, 89, 87],
 98: [101, 100, 85, 102],
 99: [136, 83, 131, 102],
 100: [98, 36, 101, 47],
 101: [98, 100, 85, 47],
 102: [83, 98, 99, 85],
 103: [80, 86, 136, 108, 104, 111],
 104: [103, 136, 106, 108, 111],
 105: [153, 106, 107, 108, 109],
 106: [108, 133, 136, 105, 104, 109, 110],
 107: [153, 151, 105, 109, 110],
 108: [193, 103, 104, 105, 106, 75, 45, 86, 153],
 109: [105, 106, 107, 110],
 110: [130, 132, 133, 151, 106, 107, 109],
 111: [136, 104, 103],
 112: [113, 123, 116, 117],
 113: [112, 117, 118, 57, 122],
 114: [0, 180, 55, 54, 119, 120, 62],
 115: [57, 124, 118, 63],
 116: [112, 121, 123, 117],
 117: [112, 113, 116, 54, 120, 121, 122],
 118: [113, 115, 57],
 119: [114, 180, 21, 22, 184, 121, 120, 93, 25],
 120: [114, 117, 54, 119, 121],
 121: [116, 21, 117, 119, 24, 123, 120],
 122: [113, 117, 53, 54, 57],
 123: [112, 116, 21, 24, 121],
 124: [65, 115, 56, 125, 126, 63],
 125: [60, 56, 58, 124, 126],
 126: [124, 161, 156, 157, 154, 60, 125],
 127: [66, 130, 132, 133, 131],
 128: [17, 11, 20, 134],
 129: [96, 20, 134, 135, 11],
 130: [132, 133, 110, 127],
 131: [136, 99, 133, 127],
 132: [130, 134, 151, 110, 127],
 133: [130, 131, 136, 106, 110, 127],
 134: [128, 129, 132, 20, 135, 151],
 135: [96, 129, 134, 151, 152],
 136: [99, 131, 133, 103, 104, 106, 111, 80, 83],
 137: [144, 139, 140, 141],
 138: [143, 179, 183, 139, 141, 175],
 139: [137, 138, 141, 143],
 140: [144, 137, 141],
 141: [192, 183, 137, 138, 139, 140, 142],
 142: [192, 176, 194, 181, 183, 92, 141],
 143: [2, 3, 138, 139, 175],
 144: [137, 140],
 145: [193, 146, 147, 164, 173],
 146: [193, 145, 164, 70, 75, 77, 78],
 147: [145, 195, 193],
 148: [162, 149, 166, 150, 40, 186, 187],
 149: [34, 162, 148, 40, 172],
 150: [40, 187, 148],
 151: [132, 134, 135, 107, 110, 88, 153, 152, 94],
 152: [96, 151, 88, 135],
 153: [192, 193, 105, 107, 108, 151, 94, 95],
 154: [161, 126, 155, 157, 158, 159],
 155: [154, 156, 157, 159],
 156: [157, 155, 60, 61, 126],
 157: [154, 155, 156, 126],
 158: [160, 161, 154, 159],
 159: [160, 158, 154, 155, 189, 190],
 160: [158, 190, 159],
 161: [154, 126, 158],
 162: [148, 149, 166, 167, 172],
 163: [164, 167, 168, 169, 171, 172],
 164: [163, 72, 169, 171, 173, 78, 145, 146],
 165: [174, 166],
 166: [162, 148, 165, 186, 188],
 167: [162, 163, 170, 171, 172],
 168: [163, 38, 169, 74, 172],
 169: [163, 164, 71, 72, 74, 168],
 170: [167],
 171: [163, 164, 173, 167],
 172: [34, 162, 163, 149, 38, 167, 168],
 173: [145, 171, 164],
 174: [165],
 175: [2, 5, 138, 143, 177, 178, 179, 182],
 176: [177, 178, 180, 181, 184, 92, 142],
 177: [176, 178, 179, 181, 175],
 178: [176, 177, 180, 5, 175],
 179: [177, 181, 183, 138, 175],
 180: [0, 176, 178, 114, 5, 119, 184],
 181: [176, 177, 179, 183, 142],
 182: [0, 2, 5, 175],
 183: [179, 181, 138, 141, 142],
 184: [176, 180, 119, 92, 93],
 185: [186, 187, 188],
 186: [148, 166, 185, 187, 188],
 187: [148, 150, 185, 186, 188],
 188: [185, 186, 187, 166],
 189: [159, 190, 191],
 190: [160, 189, 159],
 191: [189],
 192: [193, 194, 153, 141, 142, 95],
 193: [192, 195, 75, 108, 145, 146, 147, 153],
 194: [192, 92, 142, 95],
 195: [193, 147]}
In [65]:
# the matrix of neighboorhood:

pd.DataFrame(*w_queen.full()).astype(int) # 1 means both are neighbors
Out[65]:
0 1 2 3 4 5 6 7 8 9 ... 186 187 188 189 190 191 192 193 194 195
0 0 0 1 0 1 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 0 1 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 1 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 0 1 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 1 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
191 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
192 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 1 0
193 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 1
194 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
195 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0

196 rows × 196 columns

In [66]:
# pct of neighboorhood (density)
w_queen.pct_nonzero
Out[66]:
2.7332361516034984
In [67]:
# a province with NO neighbor?
w_queen.islands
Out[67]:
[]

Moran's correlation¶

In [68]:
# needed for spatial correlation
w_queen.transform = 'R'
In [69]:
pd.DataFrame(*w_queen.full()).sum(axis=1) # 1 means both are neighbors
Out[69]:
0      1.0
1      1.0
2      1.0
3      1.0
4      1.0
      ... 
191    1.0
192    1.0
193    1.0
194    1.0
195    1.0
Length: 196, dtype: float64
In [70]:
from esda.moran import Moran

moranCOVID = Moran(covid_provYear_map['CASOS_2021_qt'], w_queen)
moranCOVID.I,moranCOVID.p_sim
Out[70]:
(0.16803141386703818, 0.001)
  • El resultado que obtuviste del Índice de Moran es ( I = 0.168 ) y un valor p de ( 0.001 ). Esto indica que hay una autocorrelación espacial positiva significativa en tus datos. El índice de Moran ( I ) se encuentra entre -1 y 1; valores cercanos a 1 sugieren que los valores similares están agrupados, mientras que valores negativos indican dispersión. Un valor p menor a 0.05 refuerza que la autocorrelación no es aleatoria y es estadísticamente significativa.
In [71]:
import numpy as np
from splot.esda import moran_scatterplot
import matplotlib.pyplot as plt

# Asegúrate de que moranDENGUE esté definido correctamente
fig, ax = moran_scatterplot(moranCOVID)
ax.set_xlabel('covid_share')
ax.set_ylabel('SpatialLag_covid_share')
plt.show()
No description has been provided for this image
In [76]:
from splot.esda import moran_scatterplot
import matplotlib.pyplot as plt

fig, ax = moran_scatterplot(moranCOVID)
ax.set_xlabel('covid_share')
ax.set_ylabel('SpatialLag_covid_share')
Out[76]:
Text(0, 0.5, 'SpatialLag_covid_share')
No description has been provided for this image

--> Interpretación:

  • El valor 0.17 (indicado en el título) es positivo pero relativamente bajo, lo que sugiere una débil autocorrelación positiva. Esto significa que, en general, las áreas con una alta proporción de casos de COVID tienden a estar cerca de otras áreas con una alta proporción de casos, y las áreas con bajas proporciones están cerca de otras de bajas proporciones, pero esta relación no es muy fuerte.
In [72]:
# The scatterplot with local info

from esda.moran import Moran_Local

# calculate Moran_Local and plot
lisaCOVID = Moran_Local(y=covid_provYear_map['CASOS_2021_qt'], w=w_knn,seed=2021)
fig, ax = moran_scatterplot(lisaCOVID,p=0.05)
ax.set_xlabel('covid_share')
ax.set_ylabel('SpatialLag_covid_share');
No description has been provided for this image
In [73]:
from splot.esda import plot_local_autocorrelation
plot_local_autocorrelation(lisaCOVID, covid_provYear_map,'CASOS_2021_qt')
plt.show()
No description has been provided for this image
In [74]:
# the map with the spots and outliers

from splot.esda import lisa_cluster
f, ax = plt.subplots(1, figsize=(12, 12))
plt.title('Spots and Outliers')
fig = lisa_cluster(lisaCOVID,
                   covid_provYear_map,ax=ax,
                   legend_kwds={'loc': 'center left',
                                'bbox_to_anchor': (0.7, 0.6)})
No description has been provided for this image

--> Interpretación:

  • Cuadrante Alto-Alto (arriba a la derecha): Muestra áreas donde tanto los casos de COVID como sus vecinos cercanos tienen valores altos.

  • Cuadrante Bajo-Bajo (abajo a la izquierda): Muestra áreas donde tanto los casos de COVID como los de sus vecinos son bajos.

  • Cuadrante Alto-Bajo y Bajo-Alto (arriba a la izquierda y abajo a la derecha): Representan áreas de “outliers espaciales”, es decir, donde los valores de los casos de COVID son opuestos a los de sus vecinos (por ejemplo, un área con un valor alto rodeada por áreas con valores bajos o viceversa).

  • Línea de Tendencia: La línea roja indica la tendencia general entre los casos de COVID en un área y el promedio de sus vecinos. La pendiente positiva respalda la autocorrelación positiva débil (pero significativa) detectada por el índice de Moran.

En resumen, aunque existe cierta agrupación de casos similares de COVID (positiva), esta es moderada, lo cual podría indicar que las zonas de alta o baja incidencia están algo agrupadas, pero no con una tendencia fuerte en todo el país. Esto puede deberse a varios factores, como la movilidad, densidad de población o intervenciones sanitarias en 2021.

In [75]:
# quadrant
lisaCOVID.q
Out[75]:
array([1, 1, 2, 2, 2, 2, 1, 4, 3, 3, 3, 3, 3, 3, 1, 3, 3, 4, 4, 3, 3, 3,
       3, 3, 4, 3, 3, 1, 4, 3, 3, 1, 1, 2, 1, 1, 4, 1, 1, 3, 1, 3, 4, 3,
       3, 1, 4, 4, 3, 3, 3, 3, 3, 4, 4, 4, 4, 2, 1, 1, 1, 1, 3, 2, 2, 2,
       1, 4, 2, 1, 4, 2, 4, 4, 4, 1, 2, 2, 4, 1, 4, 2, 3, 2, 2, 3, 1, 4,
       3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 4, 1, 4, 3, 1, 1, 1, 1, 1, 2, 1, 1,
       1, 1, 4, 1, 2, 4, 3, 1, 1, 3, 4, 3, 2, 1, 1, 2, 1, 1, 4, 3, 2, 1,
       1, 1, 1, 3, 2, 4, 1, 2, 1, 2, 2, 2, 2, 4, 2, 3, 1, 2, 1, 4, 3, 1,
       1, 1, 2, 1, 1, 1, 1, 1, 4, 4, 4, 3, 3, 3, 2, 4, 3, 3, 4, 3, 3, 1,
       3, 2, 3, 1, 3, 3, 1, 4, 4, 4, 2, 3, 2, 1, 2, 1, 4, 2, 4, 3])
In [76]:
# significance
lisaCOVID.p_sim
Out[76]:
array([0.447, 0.166, 0.072, 0.106, 0.156, 0.35 , 0.054, 0.164, 0.48 ,
       0.001, 0.078, 0.009, 0.079, 0.037, 0.38 , 0.428, 0.039, 0.418,
       0.158, 0.049, 0.342, 0.134, 0.009, 0.192, 0.253, 0.035, 0.153,
       0.344, 0.154, 0.477, 0.315, 0.487, 0.466, 0.434, 0.055, 0.187,
       0.334, 0.205, 0.162, 0.491, 0.181, 0.2  , 0.138, 0.086, 0.141,
       0.447, 0.491, 0.081, 0.153, 0.205, 0.14 , 0.19 , 0.373, 0.236,
       0.239, 0.499, 0.498, 0.419, 0.286, 0.413, 0.245, 0.08 , 0.43 ,
       0.18 , 0.391, 0.262, 0.001, 0.191, 0.3  , 0.39 , 0.421, 0.455,
       0.349, 0.259, 0.439, 0.116, 0.441, 0.467, 0.495, 0.371, 0.415,
       0.114, 0.299, 0.5  , 0.084, 0.198, 0.227, 0.032, 0.073, 0.051,
       0.001, 0.007, 0.007, 0.025, 0.174, 0.289, 0.11 , 0.211, 0.324,
       0.296, 0.39 , 0.354, 0.376, 0.379, 0.007, 0.02 , 0.026, 0.196,
       0.093, 0.053, 0.001, 0.16 , 0.378, 0.385, 0.242, 0.494, 0.447,
       0.281, 0.249, 0.149, 0.12 , 0.231, 0.272, 0.441, 0.373, 0.059,
       0.027, 0.014, 0.139, 0.205, 0.002, 0.006, 0.013, 0.003, 0.492,
       0.352, 0.032, 0.359, 0.409, 0.21 , 0.5  , 0.383, 0.496, 0.209,
       0.246, 0.309, 0.18 , 0.12 , 0.484, 0.075, 0.447, 0.122, 0.145,
       0.256, 0.049, 0.054, 0.018, 0.023, 0.033, 0.042, 0.028, 0.037,
       0.02 , 0.075, 0.27 , 0.02 , 0.32 , 0.08 , 0.254, 0.435, 0.441,
       0.051, 0.062, 0.061, 0.018, 0.487, 0.018, 0.276, 0.372, 0.265,
       0.133, 0.286, 0.376, 0.356, 0.003, 0.148, 0.347, 0.463, 0.497,
       0.045, 0.017, 0.023, 0.302, 0.356, 0.11 , 0.467])
In [77]:
# quadrant: 1 HH,  2 LH,  3 LL,  4 HL

pd.Series(lisaCOVID.q).value_counts()
Out[77]:
1    61
3    59
4    40
2    36
Name: count, dtype: int64
In [78]:
covid_provYear_map['COVID_quadrant']=[l if p <0.05 else 0 for l,p in zip(lisaCOVID.q,lisaCOVID.p_sim)  ]
covid_provYear_map['COVID_quadrant'].value_counts()
Out[78]:
COVID_quadrant
0    157
1     18
3     14
2      4
4      3
Name: count, dtype: int64
In [79]:
labels = [ '0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier']

covid_provYear_map['COVID_quadrant_names']=[labels[i] for i in covid_provYear_map['COVID_quadrant']]

covid_provYear_map['COVID_quadrant_names'].value_counts()
Out[79]:
COVID_quadrant_names
0 no_sig         157
1 hotSpot         18
3 coldSpot        14
2 coldOutlier      4
4 hotOutlier       3
Name: count, dtype: int64
In [80]:
from matplotlib import colors
myColMap = colors.ListedColormap([ 'ghostwhite', 'red', 'green', 'black','orange'])




f, ax = plt.subplots(1, figsize=(12,12))


plt.title('Spots and Outliers')

covid_provYear_map.plot(column='COVID_quadrant_names',
                categorical=True,
                cmap=myColMap,
                linewidth=0.1,
                edgecolor='white',
                legend=True,
                legend_kwds={'loc': 'center left',
                             'bbox_to_anchor': (0.7, 0.6)},
                ax=ax)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()
No description has been provided for this image
In [81]:
import folium

map1=covid_provYear_map[covid_provYear_map.COVID_quadrant_names=='1 hotSpot']
map2=covid_provYear_map[covid_provYear_map.COVID_quadrant_names=='2 coldOutlier']
map3=covid_provYear_map[covid_provYear_map.COVID_quadrant_names=='3 coldSpot']
map4=covid_provYear_map[covid_provYear_map.COVID_quadrant_names=='4 hotOutlier']

m = map1.explore(
    color="red",
    tooltip=False,  # hide tooltip
    popup=["location"],  # (on-click)
    name="hotSpot"  # name of the layer in the map
)

map2.explore(
    m=m, # notice
    color="green",
    tooltip=False,
    popup=["location"],
    name="coldOutlier"
)

map3.explore(
    m=m,
    color="black",
    tooltip=False,
    popup=["location"],
    name="coldSpot",
)

map4.explore(
    m=m,
    color="orange",
    tooltip=False,
    popup=["location"],
    name="hotOutlier",
)

folium.TileLayer("CartoDB positron", show=False).add_to(m)  # use folium to add alternative tiles
folium.LayerControl(collapsed=True).add_to(m)  # use folium to add layer control

m  # show map
Out[81]:
Make this Notebook Trusted to load map: File -> Trust Notebook

--> Interpretación:

• Los clusters HH indican zonas con altas tasas de COVID cercanas entre sí, probablemente señalando focos de contagio activo.

• Los clusters LL muestran áreas de baja incidencia de COVID, que también pueden estar relacionadas con menor densidad de población o mejor control de la pandemia.

• Los outliers (HL y LH) pueden ser zonas con características o factores de riesgo distintos de sus áreas vecinas (por ejemplo, un área con baja incidencia rodeada de áreas con alta incidencia, o viceversa), que podrían requerir un enfoque diferenciado. Este análisis te permite no solo ver el patrón global, sino también los puntos específicos donde los casos de COVID muestran tendencias inusuales en el contexto espacial.

Bivariate LISA¶

In [85]:
#from esda.moran import Moran_BV, Moran_Local_BV
from esda.moran import Moran_BV

mbi = Moran_BV(covid_provYear_map['CASOS_pct_2021'],  covid_provYear_map['CASOS_pct_2022'],  w_queen)
mbi.I,mbi.p_sim
Out[85]:
(0.10482182079850047, 0.027)
In [86]:
# The scatterplot with local info
from esda.moran import Moran_Local_BV

# calculate Moran_Local and plot
lisaCOVID_bv = Moran_Local_BV(y=covid_provYear_map['CASOS_pct_2021'],
                               x=covid_provYear_map['CASOS_pct_2022'],
                               w=w_queen)

fig, ax = moran_scatterplot(lisaCOVID_bv, p=0.05,aspect_equal=True)

ax.set_xlabel('COVID_2022')
ax.set_ylabel('SpatialLag_COVID_2021')
plt.show()
No description has been provided for this image
In [87]:
covid_provYear_map['COVID_quadrant_21_22']=[l if p <0.05 else 0 for l,p in zip(lisaCOVID_bv.q,lisaCOVID_bv.p_sim)  ]

labels = [ '0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier']

covid_provYear_map['COVID_quadrant_21_22_names']=[labels[i] for i in covid_provYear_map['COVID_quadrant_21_22']]
In [88]:
# see new columns

covid_provYear_map
Out[88]:
OBJECTID DEPARTAMEN PROVINCIA geometry location CASOS_pct_2020 CASOS_pct_2021 CASOS_pct_2022 CASOS_pct_2023 CASOS_pct_2024 flag CASOS_2021_qt COVID_quadrant COVID_quadrant_names COVID_quadrant_21_22 COVID_quadrant_21_22_names
0 1.0 AMAZONAS CHACHAPOYAS POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... AMAZONAS+CHACHAPOYAS 0.000608 0.000899 0.001656 3.501724e-05 2.411023e-05 both 0.851840 0 0 no_sig 0 0 no_sig
1 2.0 AMAZONAS BAGUA POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... AMAZONAS+BAGUA 0.002468 0.000678 0.001028 2.353618e-05 1.148106e-05 both 0.733602 0 0 no_sig 0 0 no_sig
2 3.0 AMAZONAS BONGARA POLYGON ((-77.72759 -5.1403, -77.72361 -5.1406... AMAZONAS+BONGARA 0.000110 0.000189 0.000222 6.888638e-06 9.184850e-06 both -0.207325 0 0 no_sig 0 0 no_sig
3 4.0 AMAZONAS CONDORCANQUI POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... AMAZONAS+CONDORCANQUI 0.001247 0.000114 0.000141 1.722159e-06 0.000000e+00 both -0.750835 0 0 no_sig 0 0 no_sig
4 5.0 AMAZONAS LUYA POLYGON ((-78.13023 -5.9037, -78.13011 -5.9041... AMAZONAS+LUYA 0.000134 0.000219 0.000276 1.320322e-05 1.722159e-06 both -0.063341 0 0 no_sig 0 0 no_sig
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
191 192.0 TUMBES ZARUMILLA POLYGON ((-80.28521 -3.41276, -80.28406 -3.412... TUMBES+ZARUMILLA 0.000545 0.000309 0.000316 1.320322e-05 1.722159e-06 both 0.194210 1 1 hotSpot 0 0 no_sig
192 193.0 UCAYALI CORONEL PORTILLO POLYGON ((-74.47145 -7.27617, -74.47052 -7.277... UCAYALI+CORONELPORTILLO 0.005265 0.001785 0.002540 3.788751e-05 1.894375e-05 both 1.313104 0 0 no_sig 0 0 no_sig
193 194.0 UCAYALI ATALAYA POLYGON ((-73.18146 -9.41174, -73.13475 -9.411... UCAYALI+ATALAYA 0.000177 0.000041 0.000020 0.000000e+00 0.000000e+00 both -1.545312 0 0 no_sig 0 0 no_sig
194 195.0 UCAYALI PADRE ABAD POLYGON ((-75.43663 -8.22999, -75.43651 -8.230... UCAYALI+PADREABAD 0.000568 0.000275 0.000293 7.462691e-06 5.740531e-07 both 0.119499 0 0 no_sig 0 0 no_sig
195 196.0 UCAYALI PURUS POLYGON ((-70.6138 -9.87339, -70.6214 -9.87808... UCAYALI+PURUS 0.000129 0.000016 0.000014 5.740531e-07 0.000000e+00 both -5.199338 0 0 no_sig 0 0 no_sig

196 rows × 16 columns

In [89]:
from matplotlib import colors
myColMap = colors.ListedColormap([ 'ghostwhite', 'red', 'green', 'black','orange'])




f, ax = plt.subplots(1, figsize=(12,12))


plt.title('Spots and Outliers')

covid_provYear_map.plot(column='COVID_quadrant_21_22_names',
                categorical=True,
                cmap=myColMap,
                linewidth=0.1,
                edgecolor='white',
                legend=True,
                legend_kwds={'loc': 'center left',
                             'bbox_to_anchor': (0.7, 0.6)},
                ax=ax)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()
No description has been provided for this image

--> Interpretación:

  • Este gráfico es un Bivariate Moran Scatterplot y representa la relación espacial entre dos variables en diferentes años: casos de COVID en 2021 (en el eje vertical) y casos de COVID en 2022 (en el eje horizontal). Vamos a desglosarlo para entender mejor qué significa:

  • Las áreas en rojo (HH) podrían ser zonas críticas que mantuvieron una alta incidencia de COVID de 2021 a 2022.

  • Las áreas en azul claro (LL) muestran estabilidad en bajas tasas de incidencia de COVID en ambas épocas.

Esto puede ayudarte a identificar tendencias de persistencia en casos de COVID, lo cual es útil para estrategias de salud pública enfocadas en reducir la transmisión en zonas con altos valores en ambas variables.